library(tidyverse)
library(GGally)
library(UpSetR)
library(pheatmap)
library(cowplot)
data = 'Replogle_K562'
# Varying UMI threshold
n_assigned_cells_UMI <- read_csv(paste0(data_dir, 'guide_calling/', data, '/UMI/',
'all_assignments.csv'), show_col_types = FALSE)
n_assigned_cells_UMI_single <- read_csv(paste0(data_dir, 'guide_calling/', data, '/UMI/',
'single_assignments.csv'), show_col_types = FALSE)
# Varying ratio threshold
n_assigned_cells_ratios <- read_csv(paste0(data_dir, 'guide_calling/', data, '/ratios/',
'all_assignments.csv'), show_col_types = FALSE)
n_assigned_cells_ratios_single <- read_csv(paste0(data_dir, 'guide_calling/', data, '/ratios/',
'single_assignments.csv'), show_col_types = FALSE)
# Varying quantile threshold
n_assigned_cells_q <- read_csv(paste0(data_dir, 'guide_calling/', data, '/quantiles/',
'all_assignments.csv'), show_col_types = FALSE)
n_assigned_cells_q_single <- read_csv(paste0(data_dir, 'guide_calling/', data, '/quantiles/',
'single_assignments.csv'), show_col_types = FALSE)
# add vertical lines for chosen thresholds
UMI_t = 15
ratio_t = 30
quantile_t = 5
all_t <- data.frame(assignment = c('UMI_t', 'ratio_X%', 'top_X%_cells'),
threshold = c(UMI_t, ratio_t, quantile_t))
get_power_check_results <- function(data, name, method_names){
method_name = method_names[method_names$dir_name == name, 'method']
df = readRDS(paste0(data_dir, 'sceptre_pipeline/', data, '/power_check/', name,
'/results_run_power_check.rds')) %>%
filter(pass_qc == TRUE) %>%
mutate(method = method_name)
df <- df %>% mutate(p_adj = p.adjust(df$p_value, method="BH"))
return(df)
}
# Varying UMI threshold
dir_names_UMI = c("UMI/t2", "UMI/t5", "UMI/t10", "UMI/t15", "UMI/t20",
"UMI/t30", "UMI/t40", "UMI/t50", "UMI/t60", "UMI/t80",
"UMI/t100", "UMI/t120", "UMI/t140", "UMI/t160", "UMI/t180",
"UMI/t200")
method_names_UMI <- data.frame('method' = c(2,5,10,15,20,30,40,50,60,80,100,120,140,160,180,200),
'dir_name' = dir_names_UMI)
power_check_UMI <- lapply(dir_names_UMI, function(name) {get_power_check_results(data, name, method_names_UMI)}) %>%
bind_rows()
# Varying ratio threshold
dir_names_ratio = c("ratios/t0.1", "ratios/t0.2", "ratios/t0.3", "ratios/t0.4", "ratios/t0.5", "ratios/t0.6",
"ratios/t0.7", "ratios/t0.8", "ratios/t0.9")
method_names_ratio <- data.frame('method' = c(10, 20, 30, 40, 50, 60, 70, 80, 90),
'dir_name' = dir_names_ratio)
power_check_ratios <- lapply(dir_names_ratio, function(name) {get_power_check_results(data, name, method_names_ratio)}) %>%
bind_rows()
# Varying quantile threshold
dir_names_q = c("quantiles/q0.01", "quantiles/q0.025", "quantiles/q0.05", "quantiles/q0.075", "quantiles/q0.1",
"quantiles/q0.2", "quantiles/q0.3", "quantiles/q0.4", "quantiles/q0.5")
method_names_q <- data.frame('method' = c(1, 2.5, 5, 7.5, 10, 20, 30, 40, 50),
'dir_name' = dir_names_q)
power_check_quantiles <- lapply(dir_names_q, function(name) {get_power_check_results(data, name, method_names_q)}) %>%
bind_rows()
get_calibration_results <- function(data, name, method_names){
method_name = method_names[method_names$dir_name == name, 'method']
df = readRDS(paste0(data_dir, 'sceptre_pipeline/', data, '/discovery_analysis/', name,
'/results_run_calibration_check.rds')) %>%
filter(pass_qc == TRUE) %>%
mutate(method = method_name)
df <- df %>% mutate(p_adj = p.adjust(df$p_value, method="BH"))
return(df)
}
# Varying UMI threshold
calibration_UMI <- lapply(dir_names_UMI, function(name) {get_calibration_results(data, name, method_names_UMI)}) %>%
bind_rows() %>%
group_by(method) %>%
mutate(total_tests = n()) %>%
filter(significant == TRUE) %>%
group_by(method, total_tests) %>%
summarize(false_discoveries = n()) %>%
right_join(method_names_UMI) %>%
replace(is.na(.), 0)
## `summarise()` has grouped output by 'method'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(method)`
# Varying ratio threshold
calibration_ratios <- lapply(dir_names_ratio, function(name) {get_calibration_results(data, name, method_names_ratio)}) %>%
bind_rows() %>%
group_by(method) %>%
mutate(total_tests = n()) %>%
filter(significant == TRUE) %>%
group_by(method, total_tests) %>%
summarize(false_discoveries = n()) %>%
right_join(method_names_ratio) %>%
replace(is.na(.), 0)
## `summarise()` has grouped output by 'method'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(method)`
# Varying quantile threshold
calibration_quantiles <- lapply(dir_names_q, function(name) {get_calibration_results(data, name, method_names_q)}) %>%
bind_rows() %>%
group_by(method) %>%
mutate(total_tests = n()) %>%
filter(significant == TRUE) %>%
group_by(method, total_tests) %>%
summarize(false_discoveries = n()) %>%
right_join(method_names_q) %>%
replace(is.na(.), 0)
## `summarise()` has grouped output by 'method'. You can override using the
## `.groups` argument.
## Joining with `by = join_by(method)`
get_discovery_analysis_results <- function(data, name, method_names){
method_name = method_names[method_names$dir_name == name, 'method']
df <- readRDS(paste0(data_dir, 'sceptre_pipeline/', data, '/discovery_analysis/', name,
'/results_run_discovery_analysis.rds')) %>%
filter(pass_qc == TRUE, significant == TRUE) %>%
mutate(method = method_name)
}
# varying UMI threshold
discovery_analysis_UMI <- lapply(dir_names_UMI, function(name) {get_discovery_analysis_results(data, name, method_names_UMI)}) %>%
bind_rows()
# varying ratio threshold
discovery_analysis_ratios <- lapply(dir_names_ratio, function(name) {get_discovery_analysis_results(data, name, method_names_ratio)}) %>%
bind_rows()
# varying quantile threshold
discovery_analysis_q <- lapply(dir_names_q, function(name) {get_discovery_analysis_results(data, name, method_names_q)}) %>%
bind_rows()
u1 <- ggplot(n_assigned_cells_UMI, aes(x = method, y = n_cells)) +
geom_vline(xintercept = UMI_t, linetype="dashed") +
geom_point(color = '#3776ab', size = 2, shape = 4) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'UMI threshold', y = 'Total number of assigned cells')
u1
u2 <- ggplot(n_assigned_cells_UMI_single, aes(x = method, y = n_cells)) +
geom_vline(xintercept = UMI_t, linetype="dashed") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'UMI threshold', y = 'Number of uniquely assigned cells')
u2
r1 <- ggplot(n_assigned_cells_ratios, aes(x = method, y = n_cells)) +
geom_vline(xintercept = ratio_t, linetype="dashed") +
geom_point(color = '#3776ab', size = 2, shape = 4) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'Ratio threshold', y = 'Total number of assigned cells')
r1
r2 <- ggplot(n_assigned_cells_ratios_single, aes(x = method, y = n_cells)) +
geom_vline(xintercept = ratio_t, linetype="dashed") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'Ratio threshold', y = 'Number of uniquely assigned cells')
r2
q1 <- ggplot(n_assigned_cells_q, aes(x = method, y = n_cells)) +
geom_vline(xintercept = quantile_t, linetype="dashed") +
geom_point(color = '#3776ab', size = 2, shape = 4) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'Quantile threshold', y = 'Total number of assigned cells')
q1
q2 <- ggplot(n_assigned_cells_q_single, aes(x = method, y = n_cells)) +
geom_vline(xintercept = quantile_t, linetype="dashed") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'Quantile threshold', y = 'Number of uniquely assigned cells')
q2
n_assigned_cells <- n_assigned_cells_UMI %>%
mutate(assignment = 'UMI_t') %>%
bind_rows(n_assigned_cells_ratios %>% mutate(assignment = 'ratio_X%')) %>%
bind_rows(n_assigned_cells_q %>% mutate(assignment = "top_X%_cells"))
row1 <- ggplot(n_assigned_cells, aes(x = method, y = n_cells)) +
facet_wrap(~assignment, nrow = 1, scales = "free_x") +
geom_point(color = '#3776ab', size = 2, shape = 4) +
geom_line(color = '#3776ab') +
geom_vline(data = all_t, aes(xintercept = threshold), linetype="dashed") +
theme_bw() +
labs(x = 'Threshold', y = 'Total number of assigned cells')
n_single_assigned_cells <- n_assigned_cells_UMI_single %>%
mutate(assignment = 'UMI_t') %>%
bind_rows(n_assigned_cells_ratios_single %>% mutate(assignment = 'ratio_X%')) %>%
bind_rows(n_assigned_cells_q_single %>% mutate(assignment = "top_X%_cells"))
row2 <- ggplot(n_single_assigned_cells, aes(x = method, y = n_cells)) +
facet_wrap(~assignment, nrow = 1, scales = "free_x") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
geom_vline(data = all_t, aes(xintercept = threshold), linetype="dashed") +
theme_bw() +
labs(x = 'Threshold', y = 'Number of uniquely\nassigned cells')
# Number of significantly downregulated target genes
signif_targets_UMI <- power_check_UMI %>% filter(p_adj < 0.05) %>%
group_by(method) %>%
summarize(signif_targets = n())
u3 <- ggplot(signif_targets_UMI, aes(x = method, y = signif_targets)) +
geom_vline(xintercept = UMI_t, linetype="dashed") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'UMI threshold', y = 'Number of significant target genes')
u3
# Median lfc over targets that pass qc for all thresholds
all_targets <- group_by(power_check_UMI, grna_target) %>% summarize(pass_QC = n()) %>% filter(pass_QC == 16)
lfc_UMI <- filter(power_check_UMI, grna_target %in% all_targets$grna_target) %>%
group_by(method) %>%
summarize(median_lfc = median(-log_2_fold_change))
u4 <- ggplot(lfc_UMI, aes(x = method, y = median_lfc)) +
geom_vline(xintercept = UMI_t, linetype="dashed") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'UMI threshold', y = 'Median negative log2 fold\nchange of the target gene')
u4
# Number of significantly downregulated target genes
signif_targets_ratios <- power_check_ratios %>% filter(p_adj < 0.05) %>%
group_by(method) %>%
summarize(signif_targets = n())
r3 <- ggplot(signif_targets_ratios, aes(x = method, y = signif_targets)) +
geom_vline(xintercept = ratio_t, linetype="dashed") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'Ratio threshold', y = 'Number of significant target genes')
r3
# Median lfc over targets that pass qc for all thresholds
all_targets <- group_by(power_check_ratios, grna_target) %>% summarize(pass_QC = n()) %>% filter(pass_QC == 9)
lfc_ratios <- filter(power_check_ratios, grna_target %in% all_targets$grna_target) %>%
group_by(method) %>%
summarize(median_lfc = median(-log_2_fold_change))
r4 <- ggplot(lfc_ratios, aes(x = method, y = median_lfc)) +
geom_vline(xintercept = ratio_t, linetype="dashed") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'Ratio threshold', y = 'Median negative log2 fold\nchange of the target gene')
r4
# Number of significantly downregulated target genes
signif_targets_quantiles <- power_check_quantiles %>% filter(p_adj < 0.05) %>%
group_by(method) %>%
summarize(signif_targets = n())
q3 <- ggplot(signif_targets_quantiles, aes(x = method, y = signif_targets)) +
geom_vline(xintercept = quantile_t, linetype="dashed") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'Quantile threshold', y = 'Number of significant target genes')
q3
# Median lfc over targets that pass qc for all thresholds
all_targets <- group_by(power_check_quantiles, grna_target) %>% summarize(pass_QC = n()) %>% filter(pass_QC == 9)
lfc_q <- filter(power_check_quantiles, grna_target %in% all_targets$grna_target) %>%
group_by(method) %>%
summarize(median_lfc = median(-log_2_fold_change, na.rm = TRUE))
q4 <- ggplot(lfc_q, aes(x = method, y = median_lfc)) +
geom_vline(xintercept = quantile_t, linetype="dashed") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'Quantile threshold', y = 'Median negative log2 fold\nchange of the target gene')
q4
signif_targets <- signif_targets_UMI %>%
mutate(assignment = 'UMI_t') %>%
bind_rows(signif_targets_ratios %>% mutate(assignment = 'ratio_X%')) %>%
bind_rows(signif_targets_quantiles %>% mutate(assignment = "top_X%_cells"))
row3 <- ggplot(signif_targets, aes(x = method, y = signif_targets)) +
facet_wrap(~assignment, nrow = 1, scales = "free_x") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
geom_vline(data = all_t, aes(xintercept = threshold), linetype="dashed") +
theme_bw() +
labs(x = 'Threshold', y = 'Number of significant\ntarget genes')
lfc <- lfc_UMI %>%
mutate(assignment = 'UMI_t') %>%
bind_rows(lfc_ratios %>% mutate(assignment = 'ratio_X%')) %>%
bind_rows(lfc_q %>% mutate(assignment = "top_X%_cells"))
row4 <- ggplot(lfc, aes(x = method, y = median_lfc)) +
facet_wrap(~assignment, nrow = 1, scales = "free_x") +
geom_vline(data = all_t, aes(xintercept = threshold), linetype="dashed") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'Threshold', y = 'Median negative log2 fold\nchange of the target gene')
# Get the number of response genes per perturbation
n_discovery_UMI <- discovery_analysis_UMI %>%
group_by(method) %>%
summarize(n_response_genes = n())
u5 <- ggplot(n_discovery_UMI, aes(x = method, y = n_response_genes)) +
geom_vline(xintercept = UMI_t, linetype="dashed") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'UMI threshold', y = 'Number of total discoveries')
u5
u6 <- ggplot(calibration_UMI, aes(x = factor(method), y = false_discoveries)) +
geom_bar(stat = 'identity', fill = '#3776ab') +
theme_bw() +
labs(x = 'UMI threshold', y = paste0('Number of false discoveries'))
u6
# Get the number of response genes per perturbation
n_discovery_ratios <- discovery_analysis_ratios %>%
group_by(method) %>%
summarize(n_response_genes = n())
r5 <- ggplot(n_discovery_ratios, aes(x = method, y = n_response_genes)) +
geom_vline(xintercept = ratio_t, linetype="dashed") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'Ratio threshold', y = 'Number of total discoveries')
r5
r6 <- ggplot(calibration_ratios, aes(x = factor(method), y = false_discoveries)) +
geom_bar(stat = 'identity', fill = '#3776ab') +
theme_bw() +
labs(x = 'Ratio threshold', y = paste0('Number of false discoveries'))
r6
# Get the number of response genes per perturbation
n_discovery_q <- discovery_analysis_q %>%
group_by(method) %>%
summarize(n_response_genes = n())
q5 <- ggplot(n_discovery_q, aes(x = method, y = n_response_genes)) +
geom_vline(xintercept = quantile_t, linetype="dashed") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'Quantile threshold', y = 'Number of total discoveries')
q5
q6 <- ggplot(calibration_quantiles, aes(x = factor(method), y = false_discoveries)) +
geom_bar(stat = 'identity', fill = '#3776ab') +
theme_bw() +
labs(x = 'Quantile threshold', y = paste0('Number of false discoveries'))
q6
n_discovery <- n_discovery_UMI %>%
mutate(assignment = 'UMI_t') %>%
bind_rows(n_discovery_ratios %>% mutate(assignment = 'ratio_X%')) %>%
bind_rows(n_discovery_q %>% mutate(assignment = "top_X%_cells"))
row5 <- ggplot(n_discovery, aes(x = method, y = n_response_genes)) +
facet_wrap(~assignment, nrow = 1, scales = "free_x") +
geom_vline(data = all_t, aes(xintercept = threshold), linetype="dashed") +
geom_point(color = '#3776ab', size = 2) +
geom_line(color = '#3776ab') +
theme_bw() +
labs(x = 'Threshold', y = 'Number of discoveries')
calibration <- calibration_UMI %>%
mutate(assignment = 'UMI_t') %>%
bind_rows(calibration_ratios %>% mutate(assignment = 'ratio_X%')) %>%
bind_rows(calibration_quantiles %>% mutate(assignment = "top_X%_cells"))
row6 <- ggplot(calibration, aes(x = factor(method), y = false_discoveries)) +
facet_wrap(~assignment, nrow = 1, scales = "free_x") +
geom_bar(stat = 'identity', fill = '#3776ab') +
theme_bw() +
labs(x = 'Threshold', y = paste0('Number of false discoveries'))
plot_grid(row1, row2, row4, row3, row5, row6, ncol = 1, labels = "AUTO")
plot_grid(u1, u2, u4, u3, u5, u6, r1, r2, r4, r3, r5, r6, q1, q2, q4, q3, q5, q6,
labels = c("A(i)", "(ii)", "(iii)", "(iv)", "(v)", "(vi)",
"B(i)", "(ii)", "(iii)", "(iv)", "(v)", "(vi)",
"C(i)", "(ii)", "(iii)", "(iv)", "(v)", "(vi)"),
ncol = 6)
plot_grid(u1, r1, q1, u2, r2, q2, u4, r4, q4, u3, r3, q3, u5, r5, q5, u6, r6, q6,
labels = c("A(i)", "(ii)", "(iii)",
"B(i)", "(ii)", "(iii)",
"C(i)", "(ii)", "(iii)",
"D(i)", "(ii)", "(iii)",
"E(i)", "(ii)", "(iii)",
"F(i)", "(ii)", "(iii)"),
ncol = 3)
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.2.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Berlin
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] cowplot_1.1.3 pheatmap_1.0.12 UpSetR_1.4.0 GGally_2.2.1
## [5] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [9] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [13] ggplot2_3.5.1 tidyverse_2.0.0 BiocStyle_2.30.0
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.9 utf8_1.2.4 generics_0.1.3
## [4] stringi_1.8.4 hms_1.1.3 digest_0.6.36
## [7] magrittr_2.0.3 evaluate_0.24.0 grid_4.3.2
## [10] timechange_0.3.0 RColorBrewer_1.1-3 bookdown_0.40
## [13] fastmap_1.2.0 plyr_1.8.9 jsonlite_1.8.8
## [16] tinytex_0.52 gridExtra_2.3 BiocManager_1.30.23
## [19] fansi_1.0.6 scales_1.3.0 jquerylib_0.1.4
## [22] cli_3.6.3 crayon_1.5.3 rlang_1.1.4
## [25] bit64_4.0.5 munsell_0.5.1 withr_3.0.0
## [28] cachem_1.1.0 yaml_2.3.9 parallel_4.3.2
## [31] tools_4.3.2 tzdb_0.4.0 colorspace_2.1-0
## [34] ggstats_0.6.0 vctrs_0.6.5 R6_2.5.1
## [37] lifecycle_1.0.4 bit_4.0.5 vroom_1.6.5
## [40] pkgconfig_2.0.3 pillar_1.9.0 bslib_0.7.0
## [43] gtable_0.3.5 data.table_1.15.4 Rcpp_1.0.13
## [46] glue_1.7.0 highr_0.11 xfun_0.46
## [49] tidyselect_1.2.1 rstudioapi_0.16.0 knitr_1.48
## [52] farver_2.1.2 htmltools_0.5.8.1 labeling_0.4.3
## [55] rmarkdown_2.27 compiler_4.3.2